#!/usr/bin/env ruby

LKP_SRC ||= ENV['LKP_SRC'] || File.dirname(__dir__)

require "#{LKP_SRC}/lib/cci"
require "#{LKP_SRC}/lib/hash"
require "#{LKP_SRC}/lib/stats"
require 'json'

KEYWORD = %w[
  suite
  os
  arch
  category
  job_state
  job_health
  job_stage
  tbox_group
  upstream_repo
  summary.success
  summary.any_fail
  summary.any_error
  summary.any_stderr
  summary.any_warning
].freeze

def generate_list(uniq)
  arr = uniq.split(',')
  if arr.size != 2
    raise ArgumentError.new("Wrong params of uniq/group_merge, please enter the correct argument. The correct argument should be like 'last,max'.")
  end
  arr
end

def generate_group_list(group_by)
  arr = group_by.split(',')
  arr
end

class Jobs

  attr_reader :es_response
  attr_reader :jobs
  attr_reader :group_jobs
  
  def initialize(query_hash)    
    @es_response = es_opendistro_query(query_hash)
    format_es_to_jobs()
  end

  #es_response{}=>jobs[]
  def format_es_to_jobs()
    es_response = @es_response if es_response.nil?
    results = []
    es_response['hits']['hits'].each do |source|
      results << source['_source']
    end
    die('query results is empty') if results.empty?
    return @jobs=results
  end

  #对应 --show_type <es|json|array|group> 进行对应输出
  def format_data_print(type, select_field)   #select_field是可选字段
    case type
    when 'es'
      puts JSON.pretty_generate(@es_response)
    when 'json'
      puts JSON.pretty_generate(@jobs)
    when 'array'
      format_print(@jobs,select_field)
    when 'group'
      puts JSON.pretty_generate(@group_jobs)
    else
      puts 'Wrong show_type'
    end
  end


  def sortjobs(field, is_asc)
  #对jobs[]进行排序，is_asc为真则为升序排序，否则为倒序
    if is_asc
      @jobs.sort_by!{|job| job[field]}
    else
      @jobs.sort_by!{|job| job[field].reverse}
    end
  end

 
  def sortgroup_jobs(group_fields, field, is_asc)
  #对group_jobs[][]进行排序，若group_fields中含有field说明组间排序，否则组内排序
    if group_fields.include? field
      if is_asc
        @group_jobs=@group_jobs.sort{|a,b| a[1][0][field]<=>b[1][0][field]}
      else
        @group_jobs=@group_jobs.sort{|a,b| b[1][0][field]<=>a[1][0][field]}
      end
    else
      @group_jobs.each do |group_job| 
        if is_asc
          group_job.last.sort_by!{|job| job[field]}
        else
          group_job.last.sort_by!{|job| job[field].reverse}
        end
      end
    end
  end

  def sortbyfield(group_fields, field, is_asc, show_type)
    if show_type=="group"
      sortgroup_jobs(group_fields,field,is_asc) 
    else
      sortjobs(field,is_asc)
    end
  end

  # 统计显示各类errid及其最后一个result_root
  def account_show_errid()
    # errids结构如下：
    # errids{
    #   errid=>{
    #     count
    #     result_root
    #   }
    #   ...
    # }
    errids = {}

    # 将jobs内的错误信息提取，存放到errids中
    @jobs.each do |job|
      if job.has_key?("errid")
        value=job["errid"]
        value.each do |errid|
          errids[errid]||={}
          errids[errid]['count']||=1
          errids[errid]['count']+=1
          errids[errid]['result_root'] = job['result_root']
        end
      end
    end
  
    # 对errids排序，将count多的errid排在前面
    errids=errids.sort_by { |k, v| -v['count'] }

    # 遍历errids，输出结果
    # 样式如下：
    #   8       last_state.exit_fail
    #   /srv/result/iozone/2023-01-31/vm-2p8g/centos-7.6.1810-aarch64/1-xfs-kyber-64k-4g-writereadrand_rw/crystal.6654547
    #
    #   5   stderr.Dload_Upload_Total_Spent_Left_Speed
    #   /srv/result/borrow/2023-01-31/vm-2p8g/openeuler-20.03-SP1-aarch64/10/crystal.6654492
    #
    #   5   stderr.%Total%Received%Xferd_Average_Speed_Time_Time_Time_Current
    #   /srv/result/borrow/2023-01-31/vm-2p8g/openeuler-20.03-SP1-aarch64/10/crystal.6654492
    errids.each do |errid, value|
      printf "%d    %s\n%s\n\n", value['count'], errid, value['result_root']
    end
  end

  # 对传入的jobs进行去重，并根据传入的参数，重新生成jobs来取代原有的jobs
  def uniq(options = nil)
    # same_jobs用来对数据进行查重
    same_jobs = {}
    # new_jobs用来取代原有jobs
    new_jobs = []

    # 将jobs重新写入到same_jobs
    @jobs.each do |job|
      all_params = job['all_params']
      same_jobs[all_params] ||= []
      same_jobs[all_params].push(job)
    end

    # 根据选项，对指定的字段进行计算，并将结果写入到new_jobs中
    same_jobs.each do |all_params, jobs_list|
      # dataMap{
      #   field=>data[]
      # }
      dataMap = {}
      # 将一组jobs的值存放到dataMap
      jobs_list.each do |job|
        job.each do |field, value|
          dataMap[field] ||= []
          dataMap[field].push(value)
        end
      end
      # 用dataMap中的值构造new_job
      new_job = {}
      dataMap.each do |field, datas|
        new_job[field] = calculate(datas, options)
      end
      # 将new_job存放到new_jobs
      new_jobs.push(new_job)
    end

    # 用new_jobs取代jobs并返回,jobs原结构不变
    @jobs = new_jobs
  end
  
  # 按照指定的group_fields，对jobs进行分组，并根据传入的参数，对数据进行修改
  def group(group_fields, options = nil )
    # group_jobs用来存放分组后的jobs
    # group_jobs{
    #   group_field=>[job]
    # }
    group_jobs = {}
  
    # 对jobs进行分组
    @jobs.each do |job|
      # 根据指定的group_fields，重新生成唯一的group_field
      group_field = group_fields.collect { |field| job[field] }.join('-')
      group_jobs[group_field] ||= []
      group_jobs[group_field].push(job)
    end
  
    # 根据选项，对指定的字段进行计算
    group_jobs.each do |group_field, jobs|
      # dataMap{
      #   field=>data[]
      # }
      dataMap = {}
      # 将一组jobs的值存放到dataMap
      jobs.each do |job|
        job.each do |field, value|
          dataMap[field] ||= []
          dataMap[field].push(value)
        end
      end
      # 用dataMap中的值构造new_job
      new_job = {}
      dataMap.each do |field, datas|
        new_job[field] = calculate(datas, options)
      end
      # 将new_job存放到group_jobs
      group_jobs[group_field] = [].push(new_job)
    end
    @group_jobs=group_jobs
  end
  

  def calculate(datas, options)

    if options==nil
      options=['avg','last']
    end

    option=nil
  
    if datas[0].is_a?(Integer)
      option=options[0]
    else
      option=options[1]
    end

    data_return = nil
  
    # 求一组数据的平均值
    if (option == 'avg')
      sum = 0
      datas.each do |data|
        sum = sum + data
      end
      data_return = sum / datas.length
  
      # 求一组数据中的最小值
    elsif (option == 'min')
      min = 2 ** 30 - 1
      datas.each do |data|
        if (min > data)
          min = data
        end
      end
      data_return = min
  
      # 求一组数据中的最大值
    elsif (option == 'max')
      max = -2 ** 30
      datas.each do |data|
        if (max < data)
          max = data
        end
      end
      data_return = max
  
      # 求一组数据的总值
    elsif (option == 'sum')
      sum = 0
      datas.each do |data|
        sum = sum + data
      end
      data_return = sum
  
      # 求一组数据中值的个数
    elsif (option == 'count')
      data_return = datas.length
  
      # 求一组数据中第一个值
    elsif (option == 'first')
      data_return = datas.first
  
      # 求一组数据中最后一个值
    elsif (option == 'last')
      data_return = datas.last
  
    elsif (option == 'stddev')
      # 求平均值
      sum = 0
      datas.each do |data|
        sum = sum + data
      end
      avg = sum / datas.length
      # 求标准差
      datas.collect { |data| (data - avg) ** 2 }
      sum = 0
      datas.each do |data|
        sum = sum + data
      end
      data_return = (sum / datas.length) ** 0.5
    else
      data_return = nil
    end
  
    data_return

  end

  def get_suite_kpi(suite)
    kpi_array = Array.new
    program = YAML.load(File.open("#{LKP_SRC}/programs/#{suite}/meta.yaml"))
    program['results'].each do |key,value|
      kpi_array << key if value['kpi']==1
    end
    return kpi_array
  end

  def filter_stats_kpi()
    @jobs.each do |job|
      suite = job['suite']
      stats = job['stats']
      return if suite.nil?
      next if stats.nil?
      kpi_array = get_suite_kpi(suite)
      job['stats'].delete_if{ |key,value| !kpi_array.include?(key.split('.')[-1])}
    end
  end

  def flat_hash(old_hash, new_hash)
    old_hash.each do |key1, value1|
      if value1.is_a?(Hash)
        next if value1.empty?
        temp_hash = Hash.new
        value1.each do |key2, value2|
          temp_hash.merge!({"#{key1}.#{key2}" => value2})
        end
        flat_hash(temp_hash, new_hash)
       else
         new_hash.merge!({ key1 => value1 })
       end
    end
  end

  def flat_jobs()
    @jobs.each do |job|
      new_job = Hash.new
      flat_hash(job,new_job)
      job.replace(new_job)
    end
  end
  
  #移植原es-jobs.rb中es_jobs类的函数

  #统计总的jobs状态：将每个job依据不同的stat关键字分类
  def set_job_summary(stats, job)
    summary_result = ''
    stats.each_key do |stat|
      # "stderr.linux-perf": 1,
      # "stderr.error:target_not_found:ruby-dev": 1,
      # "stderr.error:could_not_open_file/var/lib/pacman/local/ldb-#:#-#/desc:Not_a_directory": 1,
      if stat.match(/stderr\./i)
        job['summary.any_stderr'] = 1
        summary_result = 'stderr'
        next
      end

      # sum.stats.pkgbuild.mb_cache.c:warning:‘read_cache’defined-but-not-used[-Wunused-function]: 1
      # sum.stats.pkgbuild.mb_cache.c:warning:control-reaches-end-of-non-void-function[-Wreturn-type]: 1
      if stat.match(/:warning:|\.warning$/i)
        job['summary.any_warning'] = 1
        summary_result = 'warning'
      end

      # "last_state.test.iperf.exit_code.127": 1,
      # "last_state.test.cci-makepkg.exit_code.1": 1,
      # sum.stats.pkgbuild.cc1plus:error:unrecognized-command-line-option‘-Wno-unknown-warning-option’[-Werror]: 2
      if stat.match(/:error:|\.error$|\.exit_code\./i)
        job['summary.any_error'] = 1
        summary_result = 'error'
      end

      if stat.match(/\.fail$/i)
        job['summary.any_fail'] = 1
        summary_result = 'fail'
      end
    end
    return unless summary_result.empty?

    job['summary.success'] = 1
  end

  # set jobs summary fields information in place
  def set_jobs_summary
    @jobs.each do |job|
      stats = job['stats']
      next unless stats

      set_job_summary(stats, job)
    end
  end

  def get_all_metrics(jobs)
    metrics = []
    jobs.each do |job|
      stats = job['stats']
      next unless stats

      metrics.concat(stats.keys)
    end
    metrics.uniq
  end

  #初始化结果统计表
  def initialize_result_hash(metrics)
    result = {
      'kvcount' => {},
      'raw.id' => {},
      'sum.stats' => {},
      'raw.stats' => {},
      'avg.stats' => {},
      'max.stats' => {},
      'min.stats' => {}
    }
    metrics.each { |metric| result['raw.stats'][metric] = [] }
    result
  end

  def set_default_value(result, stats, metrics)
    left_metrics = metrics - stats.keys
    left_metrics.each { |metric| result['raw.stats'][metric] }

    stats.each do |key, value|
      result['raw.stats'][key] << value
    end
  end

  def kvcount(result, job)
    KEYWORD.each do |keyword|
      next unless job[keyword]

      result['kvcount']["#{keyword}=#{job[keyword]}"] ||= 0
      result['kvcount']["#{keyword}=#{job[keyword]}"] += 1
      result['raw.id']["[#{keyword}=#{job[keyword]}]"] ||= []
      result['raw.id']["[#{keyword}=#{job[keyword]}]"] << job['id']
    end
  end

  def assemble_element(key, value, result)
    if key.end_with?('.element')
      value.each do |one_value_array|
        one_value_array.each do |element|
          result['sum.stats']["#{key}: #{element}"] ||= 0
          result['sum.stats']["#{key}: #{element}"] += 1
        end
      end
    else
      result['avg.stats'][key] = value.compact.sum / value.compact.size.to_f
      result['max.stats'][key] = value.compact.max
      result['min.stats'][key] = value.compact.min
    end
    result
  end

  def stats_count(result)
    result['raw.stats'].each do |key, value|
      next if key.end_with?('.message')

      if function_stat?(key)
        result['sum.stats'][key] = value.compact.size
      else
        result = assemble_element(key, value, result)
      end
    end
  end

  #对所有jobs状态统计
  def query_jobs_state(jobs)
    metrics = get_all_metrics(jobs)
    result = initialize_result_hash(metrics)
    jobs.each do |job|
      kvcount(result, job)
      stats = job['stats']
      next unless stats

      set_default_value(result, stats, metrics)
    end

    stats_count(result)
    result
  end

  #输出result为yaml
  def output_yaml(prefix, result)
    result.each do |key, value|
      prefix_key = if prefix.empty?
                     key.to_s
                   else
                     "#{prefix}.#{key}"
                   end

      if value.is_a? Hash
        output_yaml(prefix_key, value)
      else
        puts "#{prefix_key}: #{value.to_json}"
      end
    end
  end

  #输出结果入口:output
  def output
    output_yaml('', @result)
  end

  #生成统计结果result的入口
  def generate_result
    if @jobs.empty?
      puts "No query result is found"
      return
    end
    @result = query_jobs_state(@jobs)
    @result['kvcount'] = @result['kvcount'].sort.to_h
    @result['raw.id'] = @result['raw.id'].sort.to_h
    @result
  end
end

